Topics:

  1. marginal effects – margins

  2. robust / resistance regression – robustbase

  1. sample selection models – sampleSelection
  2. spatial analysis tmap
install.packages("margins")
install.packages("robustbase")
install.packages("sampleSelection")
install.packages("tmap")
library(robustbase)
library(sampleSelection)
library(readxl)
library(tidyverse)
library(olsrr)
library(sandwich)
library(lmtest)
library(margins)
library(tmap)
library(sf)

\[ y = \beta_1 + \beta_2X_1 + \beta_3X_1X_2 \]

\[ ME(X_1) = \beta_2 + \beta_3X_2 \] \[ ME_i(X_1) = \beta_2 + \beta_3X_{2i} \]

AME – average marginal effects

\[ \overline{ME_i(X_1)} = \frac{\sum_i \hat{\beta}_2 + \hat{\beta}_3X_{2i}}{n} \]

MEM – marginal effects at means

\[ ME_i(X_1) = \hat{\beta}_2 + \hat{\beta}_3\bar{X}_{2i} \] MER – marginal effect at representative case

\[ ME_i(X_1) = \hat{\beta}_2 + \hat{\beta}_3X_{2{i=k}} \]

a*b = a + b + a:b

ggplot(data = mtcars, aes(x = wt, y =mpg, color = factor(am), group = factor(am))) + 
  geom_point() +
  geom_smooth(method = "lm", se =F)
`geom_smooth()` using formula 'y ~ x'

example1 <- lm(formula = mpg ~ am + cyl + gear + wt + hp + am*hp + am*cyl + am*wt + am*gear,
               data = mtcars)
summary(example1)

Call:
lm(formula = mpg ~ am + cyl + gear + wt + hp + am * hp + am * 
    cyl + am * wt + am * gear, data = mtcars)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.4821 -1.4343 -0.4886  1.3113  5.3861 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) 30.3914607  8.4489862   3.597   0.0016 **
am          25.1563689 14.6860051   1.713   0.1008   
cyl         -0.6242845  0.7465580  -0.836   0.4120   
gear         0.5529184  1.8261784   0.303   0.7649   
wt          -1.8343640  0.9987027  -1.837   0.0798 . 
hp          -0.0235150  0.0212957  -1.104   0.2814   
am:hp        0.0336122  0.0346312   0.971   0.3423   
am:cyl      -0.0007546  1.3664746  -0.001   0.9996   
am:wt       -6.5358470  2.6823552  -2.437   0.0234 * 
am:gear     -2.6243563  2.9210663  -0.898   0.3787   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.369 on 22 degrees of freedom
Multiple R-squared:  0.8904,    Adjusted R-squared:  0.8455 
F-statistic: 19.85 on 9 and 22 DF,  p-value: 1.369e-08
margins(example1)
Average marginal effects
lm(formula = mpg ~ am + cyl + gear + wt + hp + am * hp + am *     cyl + am * wt + am * gear, data = mtcars)
summary(margins(example1))
pzn_rent <- read_excel("data/rent-poznan.xlsx")

set.seed(123)
pzn_rent_subset <- pzn_rent %>%
  add_count(quarter, name = "quarter_count") %>%
  filter(quarter_count >= 50) %>%
  filter(price >= 500, price <= 15000, flat_area >= 15, flat_area <= 250) %>%
  sample_n(3000)
  
pzn_rent_subset
model_pzn <- lm(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
                  flat_for_students +  flat_balcony,
                data = pzn_rent_subset)
summary(model_pzn)

Call:
lm(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
    flat_for_students + flat_balcony, data = pzn_rent_subset)

Residuals:
    Min      1Q  Median      3Q     Max 
-5043.1  -276.5   -53.7   223.2  9772.2 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)            553.5873    30.9729  17.873  < 2e-16 ***
flat_area               20.7297     0.8218  25.226  < 2e-16 ***
flat_rooms              85.4827    19.8450   4.308 1.70e-05 ***
individualTRUE          24.8742    25.6332   0.970  0.33193    
flat_furnishedTRUE     138.6813    25.5310   5.432 6.02e-08 ***
flat_for_studentsTRUE -169.2845    25.6333  -6.604 4.71e-11 ***
flat_balconyTRUE        64.1760    20.6216   3.112  0.00188 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 544.6 on 2993 degrees of freedom
Multiple R-squared:  0.4227,    Adjusted R-squared:  0.4215 
F-statistic: 365.2 on 6 and 2993 DF,  p-value: < 2.2e-16
plot(model_pzn)

In order to verify whether given observation is influential the following approach is taken

  1. save estimated betas from the model based on the whole dataset
  2. remove i-th observation from the data and estimate parameters on a reduced dataset

\[ dfbetas_{k,-i} = \frac{\hat{\beta}_k - \hat{\beta}_{k,-i}}{se(\hat{\beta}_{k,-i})} \]

Cooks’s distance mesure

How can we deal with influential observations?

The main difference between standard (non-robust) and robust methods is the way how loss function is calculated.

model_pzn_rob <- lmrob(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
                                  flat_for_students +  flat_balcony,
                       data = pzn_rent_subset,
                       method = "MM")
summary(model_pzn_rob)

Call:
lmrob(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
    flat_for_students + flat_balcony, data = pzn_rent_subset, method = "MM")
 \--> method = "MM"
Residuals:
     Min       1Q   Median       3Q      Max 
-3705.20  -217.14   -13.43   255.22  9774.74 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)            752.428     27.813  27.053  < 2e-16 ***
flat_area               12.665      1.162  10.902  < 2e-16 ***
flat_rooms             139.919     19.646   7.122 1.33e-12 ***
individualTRUE          41.007     16.476   2.489   0.0129 *  
flat_furnishedTRUE      84.644     17.560   4.820 1.51e-06 ***
flat_for_studentsTRUE  -85.358     15.842  -5.388 7.67e-08 ***
flat_balconyTRUE        74.079     14.368   5.156 2.69e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Robust residual standard error: 344.6 
Multiple R-squared:  0.4267,    Adjusted R-squared:  0.4255 
Convergence in 20 IRWLS iterations

Robustness weights: 
 53 observations c(176,198,199,208,260,284,411,540,601,629,664,684,698,723,743,761,786,851,909,943,1242,1277,1328,1350,1358,1379,1449,1460,1609,1622,1841,1889,1898,1900,2062,2080,2120,2258,2297,2301,2400,2424,2445,2463,2496,2557,2625,2663,2749,2816,2883,2909,2975)
     are outliers with |weight| = 0 ( < 3.3e-05); 
 240 weights are ~= 1. The remaining 2707 ones are summarized as
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000354 0.8664000 0.9523000 0.8848000 0.9856000 0.9990000 
Algorithmic parameters: 
       tuning.chi                bb        tuning.psi        refine.tol           rel.tol         scale.tol         solve.tol       eps.outlier 
        1.548e+00         5.000e-01         4.685e+00         1.000e-07         1.000e-07         1.000e-10         1.000e-07         3.333e-05 
            eps.x warn.limit.reject warn.limit.meanrw 
        4.547e-10         5.000e-01         5.000e-01 
     nResample         max.it       best.r.s       k.fast.s          k.max    maxit.scale      trace.lev            mts     compute.rd fast.s.large.n 
           500             50              2              1            200            200              0           1000              0           2000 
                  psi           subsampling                   cov compute.outlier.stats 
           "bisquare"         "nonsingular"         ".vcov.avar1"                  "SM" 
seed : int(0) 
plot(x = model_pzn_rob$model$price, y = model_pzn_rob$rweights, xlab = "Price", ylab = "Weight (lmrob)")

Econometricians often use “robust” term to describe standard errors that are robust to HC or temporal correlation or clustering Statistican often use “robust” term to describe model that is robust to influential observations

Post-hoc sensitivity analysis:

Sample selection models:

  1. basic selection model: Heckman’s model
  2. more advanced econometric models: copula selection models
  3. more statistical approach: not missing at random models

Model without selection bias (subset of people who are in the labour force)

m_outcome <- lm(formula = wage ~ educ, data = Mroz87, subset  = lfp ==1)
summary(m_outcome)

Call:
lm(formula = wage ~ educ, data = Mroz87, subset = lfp == 1)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.6797 -1.6658 -0.4556  0.8794 21.1487 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2.09237    0.84829  -2.467    0.014 *  
educ         0.49531    0.06595   7.511 3.49e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.114 on 426 degrees of freedom
Multiple R-squared:  0.1169,    Adjusted R-squared:  0.1149 
F-statistic: 56.41 on 1 and 426 DF,  p-value: 3.486e-13
data(Mroz87)
m <- selection(selection = lfp ~ educ + age + kids5 + kids618 + nwifeinc,
               outcome  = wage  ~ educ, 
               data = Mroz87, 
               method = "ml")
summary(m)
--------------------------------------------
Tobit 2 model (sample selection model)
Maximum Likelihood estimation
Newton-Raphson maximisation, 7 iterations
Return code 8: successive function values within relative tolerance limit (reltol)
Log-Likelihood: -1478.116 
753 observations (325 censored and 428 observed)
10 free parameters (df = 743)
Probit selection equation:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.489477   0.308468  -4.829 1.67e-06 ***
educ         0.163732   0.020126   8.136 1.72e-15 ***
age         -0.005492   0.003707  -1.482 0.138839    
kids5       -0.167565   0.052057  -3.219 0.001343 ** 
kids618      0.016332   0.020581   0.794 0.427724    
nwifeinc    -0.007270   0.002115  -3.437 0.000621 ***
Outcome equation:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -6.77655    0.96767  -7.003 5.62e-12 ***
educ         0.66434    0.07535   8.817  < 2e-16 ***
   Error terms:
      Estimate Std. Error t value Pr(>|t|)    
sigma 4.153999   0.166485   24.95   <2e-16 ***
rho   0.989398   0.004279  231.24   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
--------------------------------------------

Generate sample data

set.seed(123)
n <- 1000 
x1 <- rnorm(n = n, mean = 0, sd = 1)
x2 <- rnorm(n = n, mean = 0, sd = 1)
errors <- MASS::mvrnorm(n = n, mu = c(0, 0), 
                        Sigma = matrix(c(1, 0.5, 0.5, 2), nrow=2),
                        empirical = T)

selmodel <- data.frame(r = 1 + x1 + x2 + errors[, 1],
                       y = 1 + x1 + errors[, 2])

selmodel$sel <- selmodel$r > 0

selection(selection = sel ~ x1 + x2,outcome = y ~ x1, data = selmodel) |> 
  summary()

Spatial

df_salaries <- read_excel("data/data-salaries.xlsx", sheet = 2) %>%
  dplyr::select(id = Kod, name = Nazwa, salaries = Wartosc)

df_real <- read_excel("data/data-real-estate.xlsx", sheet = 2) %>%
  dplyr::select(id = Kod, name = Nazwa, real = Wartosc)

df_model <- df_salaries %>%
  inner_join(df_real) %>%
  filter(real > 0) %>%
  mutate(woj = substr(id,1,2),
         cities = str_detect(name, "m\\."),
         capitals = str_detect(name, "Białystok|Bydgoszcz|Gdańsk|Gorzów Wielkopolski|Katowice|Kielce|Kraków|Lublin|Łódź|Olsztyn|Opole|Poznań|Rzeszów|Szczecin|Toruń|Warszawa|Wrocław|Zielona Góra"))
Joining, by = c("id", "name")
df_model %>%
  filter(capitals)
NA
plot(log(df_model$salaries), log(df_model$real))

cor((df_model$salaries), (df_model$real))
[1] 0.4726012
model1 <- lm(formula = log(real) ~ log(salaries) + woj + capitals + cities, data = df_model)
df_model$resids <- resid(model1)
plot(model1)

summary(model1)

Call:
lm(formula = log(real) ~ log(salaries) + woj + capitals + cities, 
    data = df_model)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6441 -0.1337 -0.0074  0.1165  0.9212 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    0.37045    1.09041   0.340 0.734256    
log(salaries)  0.89737    0.12805   7.008 1.20e-11 ***
woj04          0.06667    0.06230   1.070 0.285261    
woj06          0.19529    0.06107   3.198 0.001508 ** 
woj08         -0.00429    0.07159  -0.060 0.952252    
woj10          0.20641    0.06053   3.410 0.000724 ***
woj12          0.40529    0.06193   6.544 2.07e-10 ***
woj14          0.22529    0.05235   4.304 2.17e-05 ***
woj16         -0.12258    0.07463  -1.643 0.101354    
woj18          0.15532    0.06110   2.542 0.011439 *  
woj20          0.07913    0.06657   1.189 0.235344    
woj22          0.37975    0.06303   6.025 4.19e-09 ***
woj24          0.04039    0.05597   0.722 0.471035    
woj26          0.08760    0.07149   1.225 0.221230    
woj28          0.09229    0.06393   1.443 0.149757    
woj30          0.14759    0.05533   2.668 0.007986 ** 
woj32          0.18194    0.06341   2.869 0.004359 ** 
capitalsTRUE   0.32291    0.06452   5.005 8.79e-07 ***
citiesTRUE     0.12809    0.03840   3.335 0.000941 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2178 on 359 degrees of freedom
Multiple R-squared:  0.4776,    Adjusted R-squared:  0.4514 
F-statistic: 18.23 on 18 and 359 DF,  p-value: < 2.2e-16
powiats <- st_read(dsn = "data/mapy/powiaty.shp")
Reading layer `powiaty' from data source `/Users/berenz/git/dydaktyka/applied-econometrics/data/mapy/powiaty.shp' using driver `ESRI Shapefile'
Simple feature collection with 380 features and 30 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 171677.6 ymin: 133223.7 xmax: 861895.7 ymax: 774923.7
CRS:           NA
woj <- st_read(dsn = "data/mapy/woj.dbf")
Reading layer `woj' from data source `/Users/berenz/git/dydaktyka/applied-econometrics/data/mapy/woj.dbf' using driver `ESRI Shapefile'
Simple feature collection with 16 features and 30 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 171677.6 ymin: 133223.7 xmax: 861882.2 ymax: 774923.7
CRS:           NA
plot(powiats$geometry)
plot(woj$geometry, add = T, lwd = 2)

powiats %>% 
  dplyr::select(id=jpt_kod_je) %>%
  left_join(df_model %>%
              mutate(id = substr(id, 1,4))) -> for_plot
Joining, by = "id"
tm_shape(for_plot) +
  tm_polygons(col = "resids", style = "jenks", midpoint = 0) +
  tm_shape(woj) + 
  tm_borders(lwd = 2)
Warning: The projection of the shape object for_plot is not known, while it seems to be projected.
Warning: Current projection of shape for_plot unknown and cannot be determined.
Warning: Current projection of shape woj unknown and cannot be determined.

Generating correlated data

library(MASS)
m <- 2
sigma <- diag(c(1,2), 2, 2)
sigma[1,2] <- sigma[2,1] <- 0.5
fake_data <- MASS::mvrnorm(n = 2000, mu=rep(0, m), Sigma = sigma, empirical = T)
cor(fake_data)
plot(fake_data)
---
title: "R Notebook"
output: html_notebook
---

Topics:

1. marginal effects -- margins

2. robust / resistance regression -- robustbase 

- infuential obs -- change slopes (parameters) / have sigificant effect on our estimates
- outliers -- obs that have high / large residual but do not have effect on our estimates

3. sample selection models -- sampleSelection
4. spatial analysis tmap


```{r}
install.packages("margins")
install.packages("robustbase")
install.packages("sampleSelection")
install.packages("tmap")
```


```{r}
library(robustbase)
library(sampleSelection)
library(readxl)
library(tidyverse)
library(olsrr)
library(sandwich)
library(lmtest)
library(margins)
library(tmap)
library(sf)
```


$$
y = \beta_1 + \beta_2X_1 + \beta_3X_1X_2
$$

$$
ME(X_1) = \beta_2 + \beta_3X_2
$$
$$
ME_i(X_1) = \beta_2 + \beta_3X_{2i}
$$

AME -- average marginal effects

$$
\overline{ME_i(X_1)} = \frac{\sum_i \hat{\beta}_2 + \hat{\beta}_3X_{2i}}{n}
$$

MEM -- marginal effects at means

$$
ME_i(X_1) = \hat{\beta}_2 + \hat{\beta}_3\bar{X}_{2i}
$$
MER -- marginal effect at representative case


$$
ME_i(X_1) = \hat{\beta}_2 + \hat{\beta}_3X_{2{i=k}}
$$
```{r}
head(mtcars)
```

a*b = a + b + a:b

```{r}
ggplot(data = mtcars, aes(x = wt, y = mpg, color = factor(am), group = factor(am))) + 
  geom_point() +
  geom_smooth(method = "lm", se = F)
```

```{r}
example1 <- lm(formula = mpg ~ am + cyl + gear + wt + hp + am*hp + am*cyl + am*wt + am*gear,
               data = mtcars)
summary(example1)
```


```{r}
margins(example1)
summary(margins(example1))
```


```{r}
pzn_rent <- read_excel("data/rent-poznan.xlsx")

set.seed(123)
pzn_rent_subset <- pzn_rent %>%
  add_count(quarter, name = "quarter_count") %>%
  filter(quarter_count >= 50) %>%
  filter(price >= 500, price <= 15000, flat_area >= 15, flat_area <= 250) %>%
  sample_n(3000)
  
pzn_rent_subset
```

```{r}
model_pzn <- lm(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
                  flat_for_students +  flat_balcony,
                data = pzn_rent_subset)
summary(model_pzn)
plot(model_pzn)
```
In order to verify whether given observation is influential the following approach is taken

1. save estimated betas from the model based on the whole dataset
2. remove i-th observation from the data and estimate parameters on a reduced dataset

$$
dfbetas_{k,-i} = \frac{\hat{\beta}_k - \hat{\beta}_{k,-i}}{se(\hat{\beta}_{k,-i})}
$$


```{r}
ols_plot_dfbetas(model_pzn)
```
Cooks's distance mesure

```{r}
ols_plot_cooksd_chart(model_pzn)
```

How can we deal with influential observations? 

- remove them but what is the reason to remove some data from your analysis? 
- use methods that are robust to influential observations

The main difference between standard (non-robust)  and robust methods is the way how loss function is calculated. 

- non-robust linear regression - non-weighted loss function (sum of squares)
- robust linear regression -- weighted loss function (weighted sum of squares)


```{r}
model_pzn_rob <- lmrob(formula = price ~ flat_area + flat_rooms + individual + flat_furnished + 
                                  flat_for_students +  flat_balcony,
                       data = pzn_rent_subset,
                       method = "MM")
summary(model_pzn_rob)
```

```{r}
plot(x = model_pzn_rob$model$price, y = model_pzn_rob$rweights, xlab = "Price", ylab = "Weight (lmrob)")
```

```{r}
data.frame(nonrobust = coef(model_pzn), robust = coef(model_pzn_rob))
```

Econometricians often use  "robust" term to describe standard errors that are robust to HC or temporal correlation or clustering
Statistican often use "robust" term to describe model that is robust to influential observations

Post-hoc sensitivity analysis:

- residual analysis -- mainly for standard errors but also for assumptions (e.g. linearity)
- influential observations analysis -- how sensitive are the estimated parameters
- omitted variable bias
- variable selection methods -- LASSO, FOCI
- selection bias


Sample selection models:

1. basic selection model: Heckman's model
2. more advanced econometric models: copula selection models
3. more statistical approach: not missing at random models

Model without selection bias (subset of people who are in the labour force)

```{r}
m_outcome <- lm(formula = wage ~ educ, data = Mroz87, subset  = lfp == 1)
summary(m_outcome)
```

```{r}
data(Mroz87)
m <- selection(selection = lfp ~ educ + age + kids5 + kids618 + nwifeinc,
               outcome  = wage  ~ educ, 
               data = Mroz87, 
               method = "ml")
summary(m)
```




Generate sample data


```{r}
set.seed(123)
n <- 1000 
x1 <- rnorm(n = n, mean = 0, sd = 1)
x2 <- rnorm(n = n, mean = 0, sd = 1)
errors <- MASS::mvrnorm(n = n, mu = c(0, 0), 
                        Sigma = matrix(c(1, 0.5, 0.5, 2), nrow=2),
                        empirical = T)

selmodel <- data.frame(r = 1 + x1 + x2 + errors[, 1],
                       y = 1 + x1 + errors[, 2])

selmodel$sel <- selmodel$r > 0

selection(selection = sel ~ x1 + x2,outcome = y ~ x1, data = selmodel) |> 
  summary()
```


Spatial

```{r}
df_salaries <- read_excel("data/data-salaries.xlsx", sheet = 2) %>%
  dplyr::select(id = Kod, name = Nazwa, salaries = Wartosc)

df_real <- read_excel("data/data-real-estate.xlsx", sheet = 2) %>%
  dplyr::select(id = Kod, name = Nazwa, real = Wartosc)

df_model <- df_salaries %>%
  inner_join(df_real) %>%
  filter(real > 0) %>%
  mutate(woj = substr(id,1,2),
         cities = str_detect(name, "m\\."),
         capitals = str_detect(name, "Białystok|Bydgoszcz|Gdańsk|Gorzów Wielkopolski|Katowice|Kielce|Kraków|Lublin|Łódź|Olsztyn|Opole|Poznań|Rzeszów|Szczecin|Toruń|Warszawa|Wrocław|Zielona Góra"))

df_model %>%
  filter(capitals)

```

```{r}
plot(log(df_model$salaries), log(df_model$real))
cor((df_model$salaries), (df_model$real))
```

```{r}
model1 <- lm(formula = log(real) ~ log(salaries) + woj + capitals + cities, data = df_model)
df_model$resids <- resid(model1)
plot(model1)
summary(model1)
```

```{r}
powiats <- st_read(dsn = "data/mapy/powiaty.shp")
woj <- st_read(dsn = "data/mapy/woj.dbf")
plot(powiats$geometry)
plot(woj$geometry, add = T, lwd = 2)
```

```{r}
powiats %>% 
  dplyr::select(id=jpt_kod_je) %>%
  left_join(df_model %>%
              mutate(id = substr(id, 1,4))) -> for_plot
```

```{r}
tm_shape(for_plot) +
  tm_polygons(col = "resids", style = "jenks", midpoint = 0) +
  tm_shape(woj) + 
  tm_borders(lwd = 2)
```

Generating correlated data

```{r}
library(MASS)
m <- 2
sigma <- diag(c(1,2), 2, 2)
sigma[1,2] <- sigma[2,1] <- 0.5
fake_data <- MASS::mvrnorm(n = 2000, mu=rep(0, m), Sigma = sigma, empirical = T)
cor(fake_data)
plot(fake_data)

```


